import json
import os
import re
from config import PROJECT_DIR
import requests
from bs4 import BeautifulSoup

baseUrl = "https://ggzy.zj.gov.cn"
url = f"{baseUrl}/inteligentsearch/rest/esinteligentsearch/getFullTextDataNew"


def loadJsgcZbgg(pageIndex: int = 1, region: str = "3301"):
    pagesize = 120
    startIndex = (pageIndex - 1) * pagesize + 1
    data = {"token": "", "pn": startIndex, "rn": pagesize, "sdt": "", "edt": "", "wd": "", "inc_wd": "", "exc_wd": "",
            "fields": "title",
            "cnum": "001", "sort": "{\"webdate\":\"0\"}", "ssort": "title", "cl": 200, "terminal": "",
            "condition": [{"fieldName": "categorynum", "isLike": True, "likeType": 2, "equal": "002001001"},
                          {"fieldName": "infoc", "isLike": True, "likeType": 2, "equal": region}],
            "time": [{"fieldName": "webdate", "startTime": "2024-01-01 00:00:00", "endTime": "2024-12-04 23:59:59"}],
            "highlights": "", "statistics": None, "unionCondition": None, "accuracy": "", "noParticiple": "0",
            "searchRange": None, "isBusiness": "1"}
    headers = {
        "Content-Type": "application/json;charset=UTF-8",
        "X-Requested-With": "XMLHttpRequest",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
    }
    response = requests.request('POST', url, json=data, headers=headers)
    # 打印响应内容
    return parseReponse(response.text)


def parseReponse(responseText=None):
    if responseText == None:
        responseText = r"""
        {"result":{"categorys":[{"categorynum":"001","count":"2402","categoryname":"公共资源网站"}],"totalcount":2402,"records":[{"categorynum":"002001001","titlenew":"杭州市城市轨道交通18号线一期工程（含杭州市城市轨道交通3号线二期、9号线二期工程）装配式消防泵房、潜污泵、电动蝶阀设备[A3301010060524921001291]","infoid":"002001001e92bfa1b-0450-4585-b488-0c8e3c4772ba","attachname":"","infoa":"A99","sysclicktimes":0,"zcinfof":"","title":"杭州市城市轨道交通18号线一期工程（含杭州市城市轨道交通3号线二期、9号线二期工程）装配式消防泵房、潜污泵、电动蝶阀设备[A3301010060524921001291]","infod":"杭州市","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  杭州市城市轨道交通18号线一期工程（含杭州市城市轨道交通3号线二期、9号线二期工程）装配式消防泵房、潜污泵、电动蝶阀设备                  项目代码:                          ...","infoc":"330101","infob":"A","webdate":"2024-12-04 00:00:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241204/e92bfa1b-0450-4585-b488-0c8e3c4772ba.html","id":"002001001e92bfa1b-0450-4585-b488-0c8e3c4772ba_001","sysscore":"0","infodate":"2024-12-04 02:33:46"},{"categorynum":"002001001","titlenew":"杭州市城市轨道交通3号线二期工程（含杭州市城市轨道交通9号线二期工程）自动扶梯、电梯设备[A3301010060524923001291]","infoid":"002001001621ef424-85e6-4e26-880e-fb5a8a5c4e07","attachname":"","infoa":"A99","sysclicktimes":0,"zcinfof":"","title":"杭州市城市轨道交通3号线二期工程（含杭州市城市轨道交通9号线二期工程）自动扶梯、电梯设备[A3301010060524923001291]","infod":"杭州市","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  杭州市城市轨道交通3号线二期工程（含杭州市城市轨道交通9号线二期工程）自动扶梯、电梯设备                  项目代码:                                          ...","infoc":"330101","infob":"A","webdate":"2024-12-04 00:00:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241204/621ef424-85e6-4e26-880e-fb5a8a5c4e07.html","id":"002001001621ef424-85e6-4e26-880e-fb5a8a5c4e07_001","sysscore":"0","infodate":"2024-12-04 02:33:55"},{"categorynum":"002001001","titlenew":"城东新城单元SC080201-60地块绿地工程[A3301010060400242001281]","infoid":"0020010019e7786af-a51d-4418-9279-4c8f71132ac1","attachname":"","infoa":"A99","sysclicktimes":0,"zcinfof":"","title":"城东新城单元SC080201-60地块绿地工程[A3301010060400242001281]","infod":"杭州市","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  城东新城单元SC080201-60地块绿地工程                  项目代码:                                            招标人:               ...","infoc":"330101","infob":"A","webdate":"2024-12-04 00:00:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241204/9e7786af-a51d-4418-9279-4c8f71132ac1.html","id":"0020010019e7786af-a51d-4418-9279-4c8f71132ac1_001","sysscore":"0","infodate":"2024-12-04 02:34:05"},{"categorynum":"002001001","titlenew":"杭州市城市轨道交通12号线一期工程（西湖段）人防防护设备采购[A3301010060524739001291]","infoid":"00200100118f9c93f-d5d5-4eed-89ee-60e03fa39541","attachname":"","infoa":"A99","sysclicktimes":0,"zcinfof":"","title":"杭州市城市轨道交通12号线一期工程（西湖段）人防防护设备采购[A3301010060524739001291]","infod":"杭州市","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  杭州市城市轨道交通12号线一期工程（西湖段）人防防护设备采购                  项目代码:                                            招标人:        ...","infoc":"330101","infob":"A","webdate":"2024-12-04 00:00:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241204/18f9c93f-d5d5-4eed-89ee-60e03fa39541.html","id":"00200100118f9c93f-d5d5-4eed-89ee-60e03fa39541_001","sysscore":"0","infodate":"2024-12-04 02:34:25"},{"categorynum":"002001001","titlenew":"杭州市城市轨道交通18号线一期工程通风空调系统轴流风机和消声器设备[A3301010060524786001291]","infoid":"00200100172633051-e4d1-4706-94da-1cfdb8d4766a","attachname":"","infoa":"A99","sysclicktimes":0,"zcinfof":"","title":"杭州市城市轨道交通18号线一期工程通风空调系统轴流风机和消声器设备[A3301010060524786001291]","infod":"杭州市","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  杭州市城市轨道交通18号线一期工程通风空调系统轴流风机和消声器设备                  项目代码:                                            招标人:     ...","infoc":"330101","infob":"A","webdate":"2024-12-04 00:00:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241204/72633051-e4d1-4706-94da-1cfdb8d4766a.html","id":"00200100172633051-e4d1-4706-94da-1cfdb8d4766a_001","sysscore":"0","infodate":"2024-12-04 02:34:15"},{"categorynum":"002001001","titlenew":"临平区中心粮库设计采购施工(EPC)工程总承包项目[A3301131280524383001212]","infoid":"002001001c61be4cf-04a2-41e3-afd8-6fb3c6a19e3e","attachname":"","infoa":"A01","sysclicktimes":0,"zcinfof":"","title":"临平区中心粮库设计采购施工(EPC)工程总承包项目[A3301131280524383001212]","infod":"临平区","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  临平区中心粮库设计采购施工(EPC)工程总承包项目                  项目代码:                                            招标人:             ...","infoc":"330113","infob":"A","webdate":"2024-12-03 17:25:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241203/c61be4cf-04a2-41e3-afd8-6fb3c6a19e3e.html","id":"002001001c61be4cf-04a2-41e3-afd8-6fb3c6a19e3e_001","sysscore":"0","infodate":"2024-12-03 18:10:26"},{"categorynum":"002001001","titlenew":"杭州中国太平金融大厦项目1幢7、8层精装修工程[A3301020070524917001211]","infoid":"0020010016be84086-b130-470a-98bb-1e774a697fc5","attachname":"","infoa":"A01","sysclicktimes":0,"zcinfof":"","title":"杭州中国太平金融大厦项目1幢7、8层精装修工程[A3301020070524917001211]","infod":"上城区","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  杭州中国太平金融大厦项目1幢7、8层精装修工程                  项目代码:                                            招标人:               ...","infoc":"330102","infob":"A","webdate":"2024-12-03 17:00:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241203/6be84086-b130-470a-98bb-1e774a697fc5.html","id":"0020010016be84086-b130-470a-98bb-1e774a697fc5_001","sysscore":"0","infodate":"2024-12-03 18:10:06"},{"categorynum":"002001001","titlenew":"杭州余杭泗溪股份经济合作社配套用房项目[A3301100140524943001211]","infoid":"00200100184295d9e-1a17-430c-878b-a0ea1f76376b","attachname":"","infoa":"A01","sysclicktimes":0,"zcinfof":"","title":"杭州余杭泗溪股份经济合作社配套用房项目[A3301100140524943001211]","infod":"余杭区","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  杭州余杭泗溪股份经济合作社配套用房项目                  项目代码:                                            招标人:                  名...","infoc":"330110","infob":"A","webdate":"2024-12-03 16:40:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241203/84295d9e-1a17-430c-878b-a0ea1f76376b.html","id":"00200100184295d9e-1a17-430c-878b-a0ea1f76376b_001","sysscore":"0","infodate":"2024-12-03 18:20:56"},{"categorynum":"002001001","titlenew":"江海商务中心一期监理[A3301300180524911001211]","infoid":"002001001b9960cb3-ae0a-44ff-9d5d-f7b8e2e930b8","attachname":"","infoa":"A01","sysclicktimes":0,"zcinfof":"","title":"江海商务中心一期监理[A3301300180524911001211]","infod":"杭州经济技术开发区","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  江海商务中心一期监理                  项目代码:                                            招标人:                  名称:杭州江海辰星置...","infoc":"330130","infob":"A","webdate":"2024-12-03 16:40:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241203/b9960cb3-ae0a-44ff-9d5d-f7b8e2e930b8.html","id":"002001001b9960cb3-ae0a-44ff-9d5d-f7b8e2e930b8_001","sysscore":"0","infodate":"2024-12-03 17:52:56"},{"categorynum":"002001001","titlenew":"青龙路（头蓬快速路—东升路）道路工程监理[A3301300180524879001221]","infoid":"00200100117e099ff-d25a-4c2d-abad-881c4e0d68fa","attachname":"","infoa":"A02","sysclicktimes":0,"zcinfof":"","title":"青龙路（头蓬快速路—东升路）道路工程监理[A3301300180524879001221]","infod":"杭州经济技术开发区","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  青龙路（头蓬快速路—东升路）道路工程监理                  项目代码:                                            招标人:                  ...","infoc":"330130","infob":"A","webdate":"2024-12-03 16:40:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241203/17e099ff-d25a-4c2d-abad-881c4e0d68fa.html","id":"00200100117e099ff-d25a-4c2d-abad-881c4e0d68fa_001","sysscore":"0","infodate":"2024-12-03 18:09:16"},{"categorynum":"002001001","titlenew":"仓兴街(沙河港-云联路)新建工程设计[A3301100140524891001221]","infoid":"002001001bf33ad26-6036-4919-8093-3b9b43dfaa71","attachname":"","infoa":"A02","sysclicktimes":0,"zcinfof":"","title":"仓兴街(沙河港-云联路)新建工程设计[A3301100140524891001221]","infod":"余杭区","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  仓兴街(沙河港-云联路)新建工程设计                  项目代码:                                            招标人:                  名称...","infoc":"330110","infob":"A","webdate":"2024-12-03 16:30:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241203/bf33ad26-6036-4919-8093-3b9b43dfaa71.html","id":"002001001bf33ad26-6036-4919-8093-3b9b43dfaa71_001","sysscore":"0","infodate":"2024-12-03 18:09:26"},{"categorynum":"002001001","titlenew":"瓶窑镇社区卫生服务中心迁建[A3301100140524897001211]","infoid":"002001001ae264fc7-a182-4281-9b79-25eed700379c","attachname":"","infoa":"A01","sysclicktimes":0,"zcinfof":"","title":"瓶窑镇社区卫生服务中心迁建[A3301100140524897001211]","infod":"余杭区","zcinfoe":"","content":"&lt;span id=\"litPosition\"&gt;信息公开招标公告&lt;/span&gt;                  项目名称:                  瓶窑镇社区卫生服务中心迁建                  项目代码:                                            招标人:                  名称:杭州市余...","infoc":"330110","infob":"A","webdate":"2024-12-03 15:50:00","highlight":{},"score":5.0,"zcinfog":"","syscategory":"001","syscollectguid":"f63d597c-0079-48ea-9c5f-be9e940a8194","linkurl":"/jyxxgk/002001/002001001/20241203/ae264fc7-a182-4281-9b79-25eed700379c.html","id":"002001001ae264fc7-a182-4281-9b79-25eed700379c_001","sysscore":"0","infodate":"2024-12-03 17:51:56"}],"analyze":[],"maxScore":5.0,"scorllId":"1733241000000;","executetime":"0.032"}}
        """
    response = json.loads(responseText, strict=False)

    if 'result' not in response:
        return []
    result = response['result']
    sectionList = []
    if 'records' in result:
        records = result['records']
        for item in records:
            linkurl = item['linkurl']
            title = item['title']
            number = getSectionNo(title)
            if number is None:
                print(f"标题不能解析出标段编号:{title}")
                continue
            sectionList.append({"sectionNo": number, "url": linkurl})
    return sectionList


def getSectionNo(title):
    pattern = r'\[A\d+\]'
    match = re.search(pattern, title)
    # 如果找到匹配项，则提取编号
    if match:
        number = match.group(0)
        number = number.replace("[", "")
        number = number.replace("]", "")
        return number


def downFiles(pageIndex: int = 1, region: str = "3301"):
    sectionList = loadJsgcZbgg(pageIndex, region)
    sectionDocs = []
    for sec in sectionList:
        docUrl = extraAttachFile(sec['url'])
        if docUrl is None:
            continue
        if 'sectionNo' in sec:
            sectionDocs.append({"section": sec['sectionNo'], "docUrl": docUrl})
        else:
            print(sec)
    print(sectionDocs)
    return sectionDocs


def extraAttachFile(url):
    url = f"{baseUrl}" + url
    # 解析文档
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    attrs = {
        "id": "attachName"
    }
    result = soup.find("a", attrs=attrs)
    if result is None:
        return
    href = result.get("href")
    if href is None:
        return
    return f"{baseUrl}{href}"


def fetch_biddoc_file_url():
    # loadJsgcZbgg()
    result = []
    pageIndex = 1
    while pageIndex < 10:
        list = downFiles(pageIndex)
        result = result + list
        pageIndex = pageIndex + 1
    content = json.dumps(result)
    print(content)
    with open("down_biddoc_from_zj_result.json", mode="w", encoding="utf-8") as f:
        f.write(content)


def downfiles(item):
    DOWN_FILE = os.path.join(PROJECT_DIR, "data", "files")
    if not os.path.exists(DOWN_FILE):
        os.makedirs(DOWN_FILE, exist_ok=True)
    # 请求文件，设置stream=True
    url = item['docUrl']
    _, file_extension = os.path.splitext(url)
    file_path = os.path.join(DOWN_FILE, item['section'] + file_extension)
    if os.path.exists(file_path):
        print(f"文件已经存在:{file_path}")
        return
    with requests.get(url, stream=True) as response:
        # 检查请求是否成功
        response.raise_for_status()
        # 以二进制写入模式打开文件
        with open(file_path, 'wb') as file:
            # 分块写入文件
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
        print(f"下载文件成功:{url}")


if __name__ == "__main__":
    with open("down_biddoc_from_zj_result.json", mode="r", encoding="utf-8") as f:
        content = f.read()
    sectionList = json.loads(content)
    list=sectionList[9:30]
    for sec in list:
        downfiles(sec)
